library(leaflet)
library(readxl)
library(dplyr)
library(reticulate)
maindata <- read_excel("Documents/Spring2021/DataVisualization/Project/maindata.xlsx")
Error: `path` does not exist: ‘Documents/Spring2021/DataVisualization/Project/maindata.xlsx’
Let look at how many unique values each column has
sapply(maindata, function(x) length(unique(x)))
id log_price property_type room_type amenities
74111 767 35 3 67122
accommodates bathrooms bed_type cancellation_policy cleaning_fee
16 18 5 5 2
city Description first_review host_has_profile_pic host_identity_verified
6 73469 2555 3 3
host_response_rate host_since instant_bookable last_review lat
81 3088 2 1372 74058
long name neighbourhood number_of_reviews review_scores_rating
73973 73331 620 371 55
thumbnail_url zipcode bedrooms beds
65884 669 12 19
library(ggplot2)
maindata%>% ggplot( aes(x=cancellation_policy, y=log_price))+ geom_boxplot()+ ggtitle( "Boxplot for log_price vs cancellation policy")



Do some hypothesis testing.

First, let us plot the different properties, using the latitude and longitude information given in our dataset.
m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~long, ~lat, color = "red",
stroke=FALSE)
m
From the map above, we can see that the properties listed in our datset are from six different locations: Los Angeles, New York, DC, Boston, Chicago, San Fransico etc.
log_price for th different cities.
NYC
rows = (maindata$city== "NYC")
tmp = maindata[rows, ]
mybins <- seq(2, 8, by=1.5)
mypalette <- colorBin( palette="YlOrBr",
domain=tmp$log_price,
na.color="transparent",
bins=mybins)
m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~long, ~lat,radius = ~log_price,
fillColor = ~mypalette(log_price),
fillOpacity = 0.5,
color = "white",stroke=FALSE
)%>%
addLegend( pal=mypalette, values=~log_price, opacity=0.9,
title = "Log_price", position = "bottomright" )
Some values were outside the color scale and will be treated as NA
m
Talk about which part of NYC has higher log_price and which part has lowest.
Boston.
rows = (maindata$city== "Boston")
tmp = maindata[rows, ]
mybins <- seq(2, 8, by=1.5)
mypalette <- colorBin( palette="YlOrBr",
domain=tmp$log_price,
na.color="transparent",
bins=mybins)
m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~long, ~lat,radius = ~log_price,
fillColor = ~mypalette(log_price),
fillOpacity = 0.5,
color = "white",stroke=FALSE
)%>%
addLegend( pal=mypalette, values=~log_price, opacity=0.9,
title = "Log_price", position = "bottomright" )
m
DC
rows = (maindata$city== "DC")
tmp = maindata[rows, ]
mybins <- seq(2, 8, by=1.5)
mypalette <- colorBin( palette="YlOrBr",
domain=tmp$log_price,
na.color="transparent",
bins=mybins)
m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~long, ~lat,radius = ~log_price,
fillColor = ~mypalette(log_price),
fillOpacity = 0.5,
color = "white",stroke=FALSE
)%>%
addLegend( pal=mypalette, values=~log_price, opacity=0.9,
title = "Log_price", position = "bottomright" )
m
San Francisco.
rows = (maindata$city== "SF")
tmp = maindata[rows, ]
mybins <- seq(2, 8, by=1.5)
mypalette <- colorBin( palette="YlOrBr",
domain=tmp$log_price,
na.color="transparent",
bins=mybins)
m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~long, ~lat,radius = ~log_price,
fillColor = ~mypalette(log_price),
fillOpacity = 0.5,
color = "white",stroke=FALSE
)%>%
addLegend( pal=mypalette, values=~log_price, opacity=0.9,
title = "Log_price", position = "bottomright" )
m
LA
rows = (maindata$city== "LA")
tmp = maindata[rows, ]
mybins <- seq(2, 8, by=1.5)
mypalette <- colorBin( palette="YlOrBr",
domain=tmp$log_price,
na.color="transparent",
bins=mybins)
m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~long, ~lat,radius = ~log_price,
fillColor = ~mypalette(log_price),
fillOpacity = 0.5,
color = "white",stroke=FALSE
)%>%
addLegend( pal=mypalette, values=~log_price, opacity=0.9,
title = "Log_price", position = "bottomright" )
m
We can see that along the edge there are properties that are high priced.
Chicago
rows = (maindata$city== "Chicago")
tmp = maindata[rows, ]
mybins <- seq(2, 8, by=1.5)
mypalette <- colorBin( palette="YlOrBr",
domain=tmp$log_price,
na.color="transparent",
bins=mybins)
m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~long, ~lat,radius = ~log_price,
fillColor = ~mypalette(log_price),
fillOpacity = 0.5,
color = "white",stroke=FALSE
)%>%
addLegend( pal=mypalette, values=~log_price, opacity=0.9,
title = "Log_price", position = "bottomright" )
m
Types of property
rows = (maindata$city== "Chicago")
tmp = maindata[rows, ]
mybins <- seq(2, 8, by=1.5)
mypalette <- colorBin( palette="YlOrBr",
domain=tmp$property_type,
na.color="transparent",
bins=mybins)
m <- leaflet(tmp) %>%
addTiles() %>%
addProviderTiles("OpenStreetMap.BZH") %>%
addCircleMarkers(~long, ~lat,radius = ~log_price,
fillColor = ~mypalette(property_type),
fillOpacity = 0.5,
color = "white",stroke=FALSE
)%>%
addLegend( pal=mypalette, values=~property_type, opacity=0.9,
title = "Property type", position = "bottomright" )
Error in cut.default(x, binsToUse, labels = FALSE, include.lowest = TRUE, :
'x' must be numeric
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyfQpsaWJyYXJ5KGxlYWZsZXQpCmxpYnJhcnkocmVhZHhsKQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KHJldGljdWxhdGUpCmBgYAoKCmBgYHtyfQptYWluZGF0YSA8LSByZWFkX2V4Y2VsKCJ+L0RvY3VtZW50cy9TcHJpbmcyMDIxL0RhdGFWaXN1YWxpemF0aW9uL1Byb2plY3QvbWFpbmRhdGEueGxzeCIpCnRtcD1tYWluZGF0YQpgYGAKCkxldCBsb29rIGF0IGhvdyBtYW55IHVuaXF1ZSB2YWx1ZXMgZWFjaCBjb2x1bW4gaGFzCgpgYGB7cn0Kc2FwcGx5KG1haW5kYXRhLCBmdW5jdGlvbih4KSBsZW5ndGgodW5pcXVlKHgpKSkKYGBgCgpgYGB7cn0KbWFpbmRhdGEgJT4lIGdyb3VwX2J5KGNhbmNlbGxhdGlvbl9wb2xpY3kpICU+JSBzZWxlY3QoY2FuY2VsbGF0aW9uX3BvbGljeSklPiUgdW5pcXVlKCkKYGBgCmBgYHtyfQpjb3VudChtYWluZGF0YSwgY2FuY2VsbGF0aW9uX3BvbGljeSkgJT4lIG11dGF0ZShyZWxhdGl2ZV9mcmVxPShuL3N1bShuKSkpCmBgYAoKCmBgYHtyfQptYWluZGF0YSAlPiUgZ3JvdXBfYnkoYmVkX3R5cGUpICU+JSBzZWxlY3QoYmVkX3R5cGUpJT4lIHVuaXF1ZSgpCmBgYAoKCmBgYHtyfQptYWluZGF0YSAlPiUgZmlsdGVyKGNpdHk9PSJOWUMiKSAlPiUgc2VsZWN0KGJlZF90eXBlKSAlPiUgY291bnQoYmVkX3R5cGUpICU+JSBtdXRhdGUocmVsYXRpdmVfZnJlcT0obi9zdW0obikpKQpgYGAKCgpgYGB7cn0KbGlicmFyeShnZ3Bsb3QyKQptYWluZGF0YSU+JSBnZ3Bsb3QoIGFlcyh4PWNhbmNlbGxhdGlvbl9wb2xpY3ksIHk9bG9nX3ByaWNlKSkrIGdlb21fYm94cGxvdCgpKyBnZ3RpdGxlKCAiQm94cGxvdCBmb3IgbG9nX3ByaWNlIHZzIGNhbmNlbGxhdGlvbiBwb2xpY3kiKQpgYGAKCmBgYHtyfQpnZ3Bsb3QobWFpbmRhdGEsIGFlcyh4PWJlZF90eXBlLCB5PWxvZ19wcmljZSkpKyBnZW9tX2JveHBsb3QoKSsgZ2d0aXRsZSggIkJveHBsb3QgZm9yIGxvZ19wcmljZSB2cyBiZWQgdHlwZSIpCmBgYAoKYGBge3J9CmdncGxvdChtYWluZGF0YSwgYWVzKHg9Y2xlYW5pbmdfZmVlLCB5PWxvZ19wcmljZSkpKyBnZW9tX2JveHBsb3QoKSsgZ2d0aXRsZSggIkJveHBsb3QgZm9yIGxvZ19wcmljZSB2cyBjbGVhbmluZyBmZWUiKQpgYGAKRG8gc29tZSBoeXBvdGhlc2lzIHRlc3RpbmcuCgpgYGB7cn0KdmNkOjptb3NhaWMofmNsZWFuaW5nX2ZlZStpbnN0YW50X2Jvb2thYmxlLCBkYXRhPW1haW5kYXRhLCBzaGFkZT1UUlVFKQpgYGAKCgpGaXJzdCwgbGV0IHVzIHBsb3QgdGhlIGRpZmZlcmVudCBwcm9wZXJ0aWVzLCB1c2luZyB0aGUgbGF0aXR1ZGUgYW5kIGxvbmdpdHVkZSBpbmZvcm1hdGlvbiBnaXZlbiBpbiBvdXIgZGF0YXNldC4gCmBgYHtyfQptIDwtIGxlYWZsZXQodG1wKSAlPiUKYWRkVGlsZXMoKSAlPiUKYWRkUHJvdmlkZXJUaWxlcygiT3BlblN0cmVldE1hcC5CWkgiKSAlPiUKYWRkQ2lyY2xlTWFya2Vycyh+bG9uZywgfmxhdCwgY29sb3IgPSAicmVkIiwKc3Ryb2tlPUZBTFNFKQptCmBgYApGcm9tIHRoZSBtYXAgYWJvdmUsIHdlIGNhbiBzZWUgdGhhdCB0aGUgcHJvcGVydGllcyBsaXN0ZWQgaW4gb3VyIGRhdHNldCBhcmUgZnJvbSBzaXggZGlmZmVyZW50IGxvY2F0aW9uczogTG9zIEFuZ2VsZXMsIE5ldyBZb3JrLCBEQywgQm9zdG9uLCBDaGljYWdvLCBTYW4gRnJhbnNpY28gZXRjLiAKCgojIyBsb2dfcHJpY2UgZm9yIHRoIGRpZmZlcmVudCBjaXRpZXMuIAoKCiMjIyBOWUMgCgpgYGB7cn0KCnJvd3MgPSAobWFpbmRhdGEkY2l0eT09ICJOWUMiKSAKdG1wID0gbWFpbmRhdGFbcm93cywgXQoKbXliaW5zIDwtIHNlcSgyLCA4LCBieT0xLjUpCm15cGFsZXR0ZSA8LSBjb2xvckJpbiggcGFsZXR0ZT0iWWxPckJyIiwKZG9tYWluPXRtcCRsb2dfcHJpY2UsCm5hLmNvbG9yPSJ0cmFuc3BhcmVudCIsCmJpbnM9bXliaW5zKQoKbSA8LSBsZWFmbGV0KHRtcCkgJT4lCmFkZFRpbGVzKCkgJT4lCmFkZFByb3ZpZGVyVGlsZXMoIk9wZW5TdHJlZXRNYXAuQlpIIikgJT4lCmFkZENpcmNsZU1hcmtlcnMofmxvbmcsIH5sYXQscmFkaXVzID0gfmxvZ19wcmljZSwKZmlsbENvbG9yID0gfm15cGFsZXR0ZShsb2dfcHJpY2UpLApmaWxsT3BhY2l0eSA9IDAuNSwKY29sb3IgPSAid2hpdGUiLHN0cm9rZT1GQUxTRQopJT4lCmFkZExlZ2VuZCggcGFsPW15cGFsZXR0ZSwgdmFsdWVzPX5sb2dfcHJpY2UsIG9wYWNpdHk9MC45LAp0aXRsZSA9ICJMb2dfcHJpY2UiLCBwb3NpdGlvbiA9ICJib3R0b21yaWdodCIgKQptCmBgYAoKVGFsayBhYm91dCB3aGljaCBwYXJ0IG9mIE5ZQyBoYXMgaGlnaGVyIGxvZ19wcmljZSBhbmQgd2hpY2ggcGFydCBoYXMgbG93ZXN0LiAKCiMjIyBCb3N0b24uCgpgYGB7cn0KCnJvd3MgPSAobWFpbmRhdGEkY2l0eT09ICJCb3N0b24iKSAKdG1wID0gbWFpbmRhdGFbcm93cywgXQoKbXliaW5zIDwtIHNlcSgyLCA4LCBieT0xLjUpCm15cGFsZXR0ZSA8LSBjb2xvckJpbiggcGFsZXR0ZT0iWWxPckJyIiwKZG9tYWluPXRtcCRsb2dfcHJpY2UsCm5hLmNvbG9yPSJ0cmFuc3BhcmVudCIsCmJpbnM9bXliaW5zKQoKbSA8LSBsZWFmbGV0KHRtcCkgJT4lCmFkZFRpbGVzKCkgJT4lCmFkZFByb3ZpZGVyVGlsZXMoIk9wZW5TdHJlZXRNYXAuQlpIIikgJT4lCmFkZENpcmNsZU1hcmtlcnMofmxvbmcsIH5sYXQscmFkaXVzID0gfmxvZ19wcmljZSwKZmlsbENvbG9yID0gfm15cGFsZXR0ZShsb2dfcHJpY2UpLApmaWxsT3BhY2l0eSA9IDAuNSwKY29sb3IgPSAid2hpdGUiLHN0cm9rZT1GQUxTRQopJT4lCmFkZExlZ2VuZCggcGFsPW15cGFsZXR0ZSwgdmFsdWVzPX5sb2dfcHJpY2UsIG9wYWNpdHk9MC45LAp0aXRsZSA9ICJMb2dfcHJpY2UiLCBwb3NpdGlvbiA9ICJib3R0b21yaWdodCIgKQptCmBgYAoKCgojIyMgREMKCmBgYHtyfQoKcm93cyA9IChtYWluZGF0YSRjaXR5PT0gIkRDIikgCnRtcCA9IG1haW5kYXRhW3Jvd3MsIF0KCm15YmlucyA8LSBzZXEoMiwgOCwgYnk9MS41KQpteXBhbGV0dGUgPC0gY29sb3JCaW4oIHBhbGV0dGU9IllsT3JCciIsCmRvbWFpbj10bXAkbG9nX3ByaWNlLApuYS5jb2xvcj0idHJhbnNwYXJlbnQiLApiaW5zPW15YmlucykKCm0gPC0gbGVhZmxldCh0bXApICU+JQphZGRUaWxlcygpICU+JQphZGRQcm92aWRlclRpbGVzKCJPcGVuU3RyZWV0TWFwLkJaSCIpICU+JQphZGRDaXJjbGVNYXJrZXJzKH5sb25nLCB+bGF0LHJhZGl1cyA9IH5sb2dfcHJpY2UsCmZpbGxDb2xvciA9IH5teXBhbGV0dGUobG9nX3ByaWNlKSwKZmlsbE9wYWNpdHkgPSAwLjUsCmNvbG9yID0gIndoaXRlIixzdHJva2U9RkFMU0UKKSU+JQphZGRMZWdlbmQoIHBhbD1teXBhbGV0dGUsIHZhbHVlcz1+bG9nX3ByaWNlLCBvcGFjaXR5PTAuOSwKdGl0bGUgPSAiTG9nX3ByaWNlIiwgcG9zaXRpb24gPSAiYm90dG9tcmlnaHQiICkKbQpgYGAKCgojIyMgIFNhbiBGcmFuY2lzY28uIAoKYGBge3J9Cgpyb3dzID0gKG1haW5kYXRhJGNpdHk9PSAiU0YiKSAKdG1wID0gbWFpbmRhdGFbcm93cywgXQoKbXliaW5zIDwtIHNlcSgyLCA4LCBieT0xLjUpCm15cGFsZXR0ZSA8LSBjb2xvckJpbiggcGFsZXR0ZT0iWWxPckJyIiwKZG9tYWluPXRtcCRsb2dfcHJpY2UsCm5hLmNvbG9yPSJ0cmFuc3BhcmVudCIsCmJpbnM9bXliaW5zKQoKbSA8LSBsZWFmbGV0KHRtcCkgJT4lCmFkZFRpbGVzKCkgJT4lCmFkZFByb3ZpZGVyVGlsZXMoIk9wZW5TdHJlZXRNYXAuQlpIIikgJT4lCmFkZENpcmNsZU1hcmtlcnMofmxvbmcsIH5sYXQscmFkaXVzID0gfmxvZ19wcmljZSwKZmlsbENvbG9yID0gfm15cGFsZXR0ZShsb2dfcHJpY2UpLApmaWxsT3BhY2l0eSA9IDAuNSwKY29sb3IgPSAid2hpdGUiLHN0cm9rZT1GQUxTRQopJT4lCmFkZExlZ2VuZCggcGFsPW15cGFsZXR0ZSwgdmFsdWVzPX5sb2dfcHJpY2UsIG9wYWNpdHk9MC45LAp0aXRsZSA9ICJMb2dfcHJpY2UiLCBwb3NpdGlvbiA9ICJib3R0b21yaWdodCIgKQptCmBgYAoKCiMjIyBMQQoKYGBge3J9Cgpyb3dzID0gKG1haW5kYXRhJGNpdHk9PSAiTEEiKSAKdG1wID0gbWFpbmRhdGFbcm93cywgXQoKbXliaW5zIDwtIHNlcSgyLCA4LCBieT0xLjUpCm15cGFsZXR0ZSA8LSBjb2xvckJpbiggcGFsZXR0ZT0iWWxPckJyIiwKZG9tYWluPXRtcCRsb2dfcHJpY2UsCm5hLmNvbG9yPSJ0cmFuc3BhcmVudCIsCmJpbnM9bXliaW5zKQoKbSA8LSBsZWFmbGV0KHRtcCkgJT4lCmFkZFRpbGVzKCkgJT4lCmFkZFByb3ZpZGVyVGlsZXMoIk9wZW5TdHJlZXRNYXAuQlpIIikgJT4lCmFkZENpcmNsZU1hcmtlcnMofmxvbmcsIH5sYXQscmFkaXVzID0gfmxvZ19wcmljZSwKZmlsbENvbG9yID0gfm15cGFsZXR0ZShsb2dfcHJpY2UpLApmaWxsT3BhY2l0eSA9IDAuNSwKY29sb3IgPSAid2hpdGUiLHN0cm9rZT1GQUxTRQopJT4lCmFkZExlZ2VuZCggcGFsPW15cGFsZXR0ZSwgdmFsdWVzPX5sb2dfcHJpY2UsIG9wYWNpdHk9MC45LAp0aXRsZSA9ICJMb2dfcHJpY2UiLCBwb3NpdGlvbiA9ICJib3R0b21yaWdodCIgKQptCmBgYAoKCldlIGNhbiBzZWUgdGhhdCBhbG9uZyB0aGUgZWRnZSB0aGVyZSBhcmUgcHJvcGVydGllcyB0aGF0IGFyZSBoaWdoIHByaWNlZC4gCgoKIyMjIENoaWNhZ28KCmBgYHtyfQoKcm93cyA9IChtYWluZGF0YSRjaXR5PT0gIkNoaWNhZ28iKSAKdG1wID0gbWFpbmRhdGFbcm93cywgXQoKbXliaW5zIDwtIHNlcSgyLCA4LCBieT0xLjUpCm15cGFsZXR0ZSA8LSBjb2xvckJpbiggcGFsZXR0ZT0iWWxPckJyIiwKZG9tYWluPXRtcCRsb2dfcHJpY2UsCm5hLmNvbG9yPSJ0cmFuc3BhcmVudCIsCmJpbnM9bXliaW5zKQoKbSA8LSBsZWFmbGV0KHRtcCkgJT4lCmFkZFRpbGVzKCkgJT4lCmFkZFByb3ZpZGVyVGlsZXMoIk9wZW5TdHJlZXRNYXAuQlpIIikgJT4lCmFkZENpcmNsZU1hcmtlcnMofmxvbmcsIH5sYXQscmFkaXVzID0gfmxvZ19wcmljZSwKZmlsbENvbG9yID0gfm15cGFsZXR0ZShsb2dfcHJpY2UpLApmaWxsT3BhY2l0eSA9IDAuNSwKY29sb3IgPSAid2hpdGUiLHN0cm9rZT1GQUxTRQopJT4lCmFkZExlZ2VuZCggcGFsPW15cGFsZXR0ZSwgdmFsdWVzPX5sb2dfcHJpY2UsIG9wYWNpdHk9MC45LAp0aXRsZSA9ICJMb2dfcHJpY2UiLCBwb3NpdGlvbiA9ICJib3R0b21yaWdodCIgKQptCmBgYAoKIyMgVHlwZXMgb2YgcHJvcGVydHkgCgpgYGB7cn0Kcm93cyA9IChtYWluZGF0YSRjaXR5PT0gIkNoaWNhZ28iKSAKdG1wID0gbWFpbmRhdGFbcm93cywgXQoKbXliaW5zIDwtIHNlcSgyLCA4LCBieT0xLjUpCm15cGFsZXR0ZSA8LSBjb2xvckJpbiggcGFsZXR0ZT0iWWxPckJyIiwKZG9tYWluPXRtcCRwcm9wZXJ0eV90eXBlLApuYS5jb2xvcj0idHJhbnNwYXJlbnQiLApiaW5zPW15YmlucykKCm0gPC0gbGVhZmxldCh0bXApICU+JQphZGRUaWxlcygpICU+JQphZGRQcm92aWRlclRpbGVzKCJPcGVuU3RyZWV0TWFwLkJaSCIpICU+JQphZGRDaXJjbGVNYXJrZXJzKH5sb25nLCB+bGF0LHJhZGl1cyA9IH5sb2dfcHJpY2UsCmZpbGxDb2xvciA9IH5teXBhbGV0dGUocHJvcGVydHlfdHlwZSksCmZpbGxPcGFjaXR5ID0gMC41LApjb2xvciA9ICJ3aGl0ZSIsc3Ryb2tlPUZBTFNFCiklPiUKYWRkTGVnZW5kKCBwYWw9bXlwYWxldHRlLCB2YWx1ZXM9fnByb3BlcnR5X3R5cGUsIG9wYWNpdHk9MC45LAp0aXRsZSA9ICJQcm9wZXJ0eSB0eXBlIiwgcG9zaXRpb24gPSAiYm90dG9tcmlnaHQiICkKbQptYXgobWFpbmRhdGEkbG9nX3ByaWNlKQpgYGAKCg==